Project problem

Using the values generated from COVID-19 cases modeling predict the number of ICU/ventilator facilities required and predict the cost of the expansion based on some arbitrary value.

Read data into Rstudio

First step is to be able to model Covid-19 patient growth

# Import library 
library(ggplot2)
library(tidyr)
library(tseries)
## Warning: package 'tseries' was built under R version 4.0.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
covid = read.csv("COVID-19_Daily_Cases__Deaths__and_Hospitalizations.csv")

coluNames = c("Date","Daily Total Cases","Daily Total Death","Daily Hospitalized cases","Case 0-17","Case 18-29","Case 30-39","Case 40-49","Case 50-59","Case 60-69","Case 70-79","Case 80+","Case Age unknown")
covid_age = covid[,1:13]
colnames(covid_age) <- coluNames

covid_age$Date = as.Date(covid_age$Date,"%m/%d/%y")
covid_age <- covid_age[order(covid_age$Date, decreasing = FALSE),]

Get some basic plots

graph1 <- ggplot(covid_age, aes(x = Date, y = `Daily Total Cases`)) + geom_line()
graph1
## Warning: Removed 1 row(s) containing missing values (geom_path).

library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
describe(covid_age)
## covid_age 
## 
##  13  Variables      190  Observations
## --------------------------------------------------------------------------------
## Date 
##          n    missing   distinct       Info       Mean        Gmd        .05 
##        189          1        189          1 2020-06-03      63.33 2020-03-10 
##        .10        .25        .50        .75        .90        .95 
## 2020-03-19 2020-04-17 2020-06-03 2020-07-20 2020-08-17 2020-08-26 
## 
## lowest : 2020-03-01 2020-03-02 2020-03-03 2020-03-04 2020-03-05
## highest: 2020-09-01 2020-09-02 2020-09-03 2020-09-04 2020-09-05
## --------------------------------------------------------------------------------
## Daily Total Cases 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      190        0      165        1    384.8    323.2      7.7     76.4 
##      .25      .50      .75      .90      .95 
##    172.0    325.5    467.5    879.9   1040.2 
## 
## lowest :    0    1    3    5   11, highest: 1158 1173 1284 1323 1471
## --------------------------------------------------------------------------------
## Daily Total Death 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      190        0       52    0.996    15.23    17.17     0.00     0.90 
##      .25      .50      .75      .90      .95 
##     2.00     7.00    25.75    40.10    45.55 
## 
## lowest :  0  1  2  3  4, highest: 51 52 53 56 57
## --------------------------------------------------------------------------------
## Daily Hospitalized cases 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      185        5       90    0.999    61.55    64.09      8.2     12.0 
##      .25      .50      .75      .90      .95 
##     16.0     25.0    114.0    160.0    168.0 
## 
## lowest :   2   3   4   6   8, highest: 181 182 198 202 355
## --------------------------------------------------------------------------------
## Case 0-17 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      190        0       68    0.999    25.76    24.09     0.00     1.90 
##      .25      .50      .75      .90      .95 
##     7.00    21.00    39.75    57.10    64.00 
## 
## lowest :  0  1  2  3  4, highest: 70 75 78 81 85
## --------------------------------------------------------------------------------
## Case 18-29 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      190        0      122        1    83.17    64.92      0.9     15.0 
##      .25      .50      .75      .90      .95 
##     36.0     76.0    114.5    171.6    206.6 
## 
## lowest :   0   2   3   4   6, highest: 224 232 234 236 247
## --------------------------------------------------------------------------------
## Case 30-39 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      190        0      110        1    70.38    58.57     1.00    13.80 
##      .25      .50      .75      .90      .95 
##    29.25    62.50    87.00   163.10   196.20 
## 
## lowest :   0   1   2   3   5, highest: 222 223 224 230 232
## --------------------------------------------------------------------------------
## Case 40-49 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      190        0      110        1    66.05    60.24     2.00     9.90 
##      .25      .50      .75      .90      .95 
##    29.00    49.00    85.75   158.60   200.40 
## 
## lowest :   0   1   2   3   5, highest: 223 228 232 243 253
## --------------------------------------------------------------------------------
## Case 50-59 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      190        0      102        1    59.66    56.95      1.0      6.9 
##      .25      .50      .75      .90      .95 
##     24.0     40.0     83.0    145.1    183.8 
## 
## lowest :   0   1   3   4   5, highest: 205 206 208 241 247
## --------------------------------------------------------------------------------
## Case 60-69 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      190        0       91        1    41.28    40.68     0.45     5.00 
##      .25      .50      .75      .90      .95 
##    16.00    25.50    61.00    95.10   120.75 
## 
## lowest :   0   1   2   3   4, highest: 151 153 156 158 245
## --------------------------------------------------------------------------------
## Case 70-79 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      190        0       59    0.999    21.99    21.94     0.45     2.90 
##      .25      .50      .75      .90      .95 
##     7.25    13.00    35.00    51.20    62.00 
## 
## lowest :   0   1   2   3   4, highest:  69  70  74  99 162
## --------------------------------------------------------------------------------
## Case 80+ 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      190        0       51    0.997    16.44    19.45        0        1 
##      .25      .50      .75      .90      .95 
##        4        8       24       41       52 
## 
## lowest :   0   1   2   3   4, highest:  68  90  94 104 167
## --------------------------------------------------------------------------------
## Case Age unknown 
##        n  missing distinct     Info     Mean      Gmd 
##      190        0        3    0.191  0.07368   0.1387 
##                             
## Value          0     1     2
## Frequency    177    12     1
## Proportion 0.932 0.063 0.005
## --------------------------------------------------------------------------------
for (i in coluNames){
  titles = paste("Graph of", i, sep = " ")
  plot(covid_age[,i], ylab = i, main = titles)
}

for (i in coluNames){
  titles = paste("Graph of", i, sep = " ")
  boxplot(covid_age[,i],ylab=i,main=titles)
}

# Based on Statistica statistics we can give a rough estimate of of how many people are admitted into hospital. 

calcHospital <- function(percentage, name_Case, dataset){
  dataset = dataset[name_Case]
  newData = dataset*percentage
  return(newData)
}

s = calcHospital(0.056, "Daily Total Cases", covid_age)
# Get like a weekly average setting the counter to 7 
calcWeek <- function(name_case, dataset){
  x = 0 
  y = 0 
  var = c()
  z = 1
  while (y != 190){
    y = y+1 
    if (y%%7 == 0){
      temp = sum(dataset[x:y,name_case])
      var[z] = temp
      z = z+1
      x = y
    }
  }
  return(var)
}

week <- calcWeek("Daily Total Cases",covid_age)
week
##  [1]    7  119  874 2380 3164 4017 4727 7117 7883 6772 5953 5323 3455 1963 1617
## [16] 1407 1463 1541 1876 1938 2102 2276 2344 2453 2702 2572 1583
weekHos <- calcWeek("Daily Hospitalized cases",covid_age)
plot(week, type="l", ylab="Cases Per Week",xlab="Week",main = "Covid Cases by Weeks")

#lines(weekHos, type="l")
#plot(weekHos, type="l", ylab="Cases Per Week",xlab="Week",main = "Covid Cases Hospitalized by Weeks")

cumulcase <- cumsum(covid_age$`Daily Total Cases`)
plot(x= as.Date(covid_age$Date), cumulcase)

# COVID-19 rates of increase both in cases and hospitalization 
calc_Rate <- function(name_case, dataset){
  x = 1
  y = 2
  count = 1
  set = c()
  while (!(y > 190)){
    temp = (dataset[y,name_case] - dataset[x,name_case])/dataset[x,name_case]
    set[count] = temp 
    count = count + 1 
    x = x+1; y = y+1
  }
  return(set)
}
r = calc_Rate("Daily Total Cases",covid_age)
r[is.na(r)] <- 0
r[is.infinite(r)] <- 0
print(sum(r)/length(r))
## [1] 0.8713212
boxplot(r, main="Rate % of Growth from Previous day to Next Day", ylab = "Rate %", ylim=c(-3,3))

library(forecast)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:Hmisc':
## 
##     src, summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Explanatory data analysis of data 

library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
# Trial run of forecasting (Start with train and test split )
data = subset(covid_age, select = c("Date", "Daily Total Cases"))

myts = ts(week, start=decimal_date(as.Date("2020-03-01")), frequency = 52)
plot(myts)

autoplot(myts)

# test = seq(from = as.Date(“2020-03-01”), to = as.Date(“2020-09-05”), by= 7)

myts = data.frame(myts, test)

plot(myts\(test,myts\)myts)

Below are all attempts of forecasting

# Conducting the Augmented Dickey-Fuller test for unit root 
#detach("package:aTSA", unload=TRUE)
library(tseries)

print("Weekly timeseries data")
## [1] "Weekly timeseries data"
adf.test(myts,alternative = "stationary")
## 
##  Augmented Dickey-Fuller Test
## 
## data:  myts
## Dickey-Fuller = -2.8588, Lag order = 2, p-value = 0.2446
## alternative hypothesis: stationary
print("Daily new cases timeseries data")
## [1] "Daily new cases timeseries data"
adf.test(covid_age$`Daily Total Cases`,alternative = "stationary")
## 
##  Augmented Dickey-Fuller Test
## 
## data:  covid_age$`Daily Total Cases`
## Dickey-Fuller = -1.8822, Lag order = 5, p-value = 0.6255
## alternative hypothesis: stationary
acf(diff(myts))

acf(covid_age$`Daily Total Cases`)

# Forecasting of weekly 
fit = auto.arima(week[1:24], seasonal=FALSE)
#plot(fit)
arimafor <- forecast(fit,h=3)
plot(arimafor)
lines(week)

print("Accuracy")
## [1] "Accuracy"
accuracy(arimafor,week[25:27])
##                      ME     RMSE      MAE        MPE      MAPE      MASE
## Training set   30.89061 621.6047 412.4783 -524.77395 537.69277 0.6161191
## Test set     -456.38457 756.5455 524.6341  -28.28803  30.81392 0.7836463
##                     ACF1
## Training set -0.04899643
## Test set              NA
week[27]
## [1] 1583
range(arimafor$mean[3])
## [1] 2877.291 2877.291
mean(arimafor$mean)/7
## [1] 391.7216
# Forecasting of daily 
fit2 = auto.arima(covid_age$`Daily Total Cases`[1:169], seasonal = FALSE)
arimafor2 <- forecast(fit2,h=21)
plot(arimafor2)
lines(covid_age$`Daily Total Cases`)

accuracy(arimafor2,covid_age$`Daily Total Cases`[170:190])
##                    ME     RMSE       MAE          MPE       MAPE      MASE
## Training set 3.028713 136.9559  97.34124    -4.628462   34.51657 0.8468401
## Test set     8.218577 145.8876 129.25212 -1351.584221 1384.12906 1.1244553
##                     ACF1
## Training set -0.01091548
## Test set              NA
covid_age$`Daily Total Cases`[190]
## [1] 129
arimafor2$mean[10]
## [1] 276.4378
mean(arimafor2$mean)
## [1] 288.6386

So the plan is to use a variety of methods to predict/forecast the number of COVID-19 cases and deaths and hospitalized cases.

Once we get the numbers, we will use that to calculate the difference between actual number and predicted/forecasted numbers to see how far off our predictions are.

# Forecasting of cumulative cases 
dates = seq(as.Date("2020-03-01"), as.Date("2020-09-06"), by=1)

adf.test(cumulcase)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  cumulcase
## Dickey-Fuller = -1.5571, Lag order = 5, p-value = 0.7615
## alternative hypothesis: stationary
chiArimaExp <- auto.arima(cumulcase[1:169], seasonal = FALSE)
chicaForecast <- forecast(chiArimaExp, h=21)
plot(chicaForecast)
lines(cumulcase)

accuracy(chicaForecast,cumulcase[170:190])
##                      ME     RMSE      MAE       MPE      MAPE      MASE
## Training set   3.023006 136.9404  97.2924 2.3178667 2.6674173 0.2443838
## Test set     570.670226 628.7905 570.6702 0.8016514 0.8016514 1.4334375
##                     ACF1
## Training set -0.01075349
## Test set              NA
#range(confirmed$Illinois[201:230])
#range(forecasted$mean)

range(cumulcase)
## [1]     0 73117
range(chicaForecast$mean)
## [1] 67180.90 72950.84
differ <- function(nums){
  numdiff = c()
  x = 1
  y = 2
  idx = 1
  while(y<length(nums)){
    temp = nums[y] - nums[x]
    numdiff[idx] = temp
    x= x+1
    y = y+1
    idx = idx+1
  }
  return(numdiff)
}
diffs = differ(chicaForecast$mean)
mean(diffs)
## [1] 289.1688
library(COVID19)
## Warning: package 'COVID19' was built under R version 4.0.3
library(plotly)
## Warning: package 'plotly' was built under R version 4.0.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:Hmisc':
## 
##     subplot
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.0.3
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(forecast)
library(prophet)
## Loading required package: Rcpp
## Loading required package: rlang
library(deSolve)
## Warning: package 'deSolve' was built under R version 4.0.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.3
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v tibble  3.0.3     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## v purrr   0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x purrr::%@%()             masks rlang::%@%()
## x lubridate::as.difftime() masks base::as.difftime()
## x purrr::as_function()     masks rlang::as_function()
## x lubridate::date()        masks base::date()
## x plotly::filter()         masks dplyr::filter(), stats::filter()
## x purrr::flatten()         masks rlang::flatten()
## x purrr::flatten_chr()     masks rlang::flatten_chr()
## x purrr::flatten_dbl()     masks rlang::flatten_dbl()
## x purrr::flatten_int()     masks rlang::flatten_int()
## x purrr::flatten_lgl()     masks rlang::flatten_lgl()
## x purrr::flatten_raw()     masks rlang::flatten_raw()
## x lubridate::intersect()   masks base::intersect()
## x purrr::invoke()          masks rlang::invoke()
## x dplyr::lag()             masks stats::lag()
## x purrr::list_along()      masks rlang::list_along()
## x purrr::modify()          masks rlang::modify()
## x purrr::prepend()         masks rlang::prepend()
## x lubridate::setdiff()     masks base::setdiff()
## x purrr::splice()          masks rlang::splice()
## x dplyr::src()             masks Hmisc::src()
## x dplyr::summarize()       masks Hmisc::summarize()
## x lubridate::union()       masks base::union()
#USA <- covid19(c("US"), level=2)
#write.csv(USA,"USA.CSV",row.names = FALSE)
#US = read.csv("USA.CSV")
# Only 2:21 and administrative_area_level_2 are important 
# Read in the new data and subset based on categories 

USA = read.csv("NewUSA.csv")
date = seq(as.Date("2020-03-16"), as.Date("2020-10-31"), by = "days")


columnanme = c("State","Value")
columnanme = c(columnanme,date)
columnanme[233] = "Grand Total"

tests = USA[USA$Value=="Sum of tests",]
tests = tests[,-2]
states = tests$State
tt = t(tests);tt = as.data.frame(tt)
index = seq(1,56,by=1)
colnames(tt) <- states
tt = tt[-1,]
tt = tt[-231,]
tt[states] = sapply(tt,as.numeric); tt = as.data.frame(tt)
tt["Date"] = date
row.names(tt) <- NULL


subsetdata <- function(data, name,columnVal){
  name <- data[data$Value== columnVal,]
  name = name [,-2]
  stat = name$State
  names = t(name); names= as.data.frame(names)
  colnames(names) <- stat
  names = names[-1,]
  names = names[-231,]
  names[stat] = sapply(names,as.numeric); names = as.data.frame(names)
  #names["Date"] = date
  row.names(names) = NULL
  return(names)
}

test = subsetdata(USA, "tests","Sum of tests")
icu = subsetdata(USA, "icu","Sum of icu")
confirmed = subsetdata(USA, "confirmed","Sum of confirmed")
recovered = subsetdata(USA, "recovered","Sum of recovered")
hospitalized = subsetdata(USA, "hospitalized","Sum of hosp")
death = subsetdata(USA, "death","Sum of deaths")
venting = subsetdata(USA, "venting","Sum of vent")
# Ordering the subsets 

order_date <- function(dataset){
  newdata = dataset[,order(-dataset[230,])]
  newdata["Date"] = date
  return(newdata)
}

test = order_date(test)
icu = order_date(icu)
confirmed = order_date(confirmed)
recovered = order_date(recovered)
hospitalized = order_date(hospitalized)
death = order_date(death)
venting = order_date(venting)
# Top 10 states with highest cumulative cases to 31st October 

top10case <- confirmed[,1:10]; top10case["Date"] = date

fig10 <- plot_ly(x=as.Date(top10case$Date))
names = colnames(top10case)
for(i in names[-11]){
  fig10 <- add_trace(fig10,x=as.Date(top10case$Date), y=top10case[,i], mode="lines", name=i)
}
fig10
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
# Top 10 states with highest cumulative test done to 31st October 

top10test <- test[,1:10]; top10test["Date"] = date

fig11 <- plot_ly(x=as.Date(top10test$Date))
names = colnames(top10test)
for(i in names[-11]){
  fig11 <- add_trace(fig11,x=as.Date(top10test$Date), y=top10test[,i], mode="lines", name=i)
}
fig11
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
# Top 10 states with highest cumulative hospitalized cases to 31st October 

top10hosp <- hospitalized[,1:10]; top10hosp["Date"] = date

fig12 <- plot_ly(x=as.Date(top10hosp$Date))
names = colnames(top10hosp)
for(i in names[-11]){
  fig12 <- add_trace(fig12,x=as.Date(top10hosp$Date), y=top10hosp[,i], mode="lines", name=i)
}
fig12
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
# Top 10 states with highest ventilation to 31st October 

top10vent <- venting[,1:10]; top10vent["Date"] = date

fig13 <- plot_ly(x=as.Date(top10vent$Date))
names = colnames(top10vent)
for(i in names[-11]){
  fig13 <- add_trace(fig13,x=as.Date(top10vent$Date), y=top10vent[,i], mode="lines", name=i)
}
fig13
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
# Top 10 Death states to 31st October 
top10death <- death[,1:10]; top10death["Date"] = date

fig14 <- plot_ly(x=as.Date(top10death$Date))
names = colnames(top10death)
for(i in names[-11]){
  fig14 <- add_trace(fig14,x=as.Date(top10death$Date), y=top10death[,i], mode="lines", name=i)
}
fig14
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
# Forecasting of Illinois cumulative cases 

acf_case = acf(diff(diff(confirmed$Illinois)))

adf.test(confirmed$Illinois)
## Warning in adf.test(confirmed$Illinois): p-value greater than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  confirmed$Illinois
## Dickey-Fuller = 1.2115, Lag order = 6, p-value = 0.99
## alternative hypothesis: stationary
p = ggplot(data = confirmed, aes(x=as.Date(Date),y=Illinois)) + geom_point()
p + geom_smooth(se=TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

arima_case = auto.arima(confirmed$Illinois[1:200])
forecasted = forecast(arima_case, h=30)

plot(forecasted)
lines(confirmed$Illinois, col='red')

accuracy(forecasted,confirmed$Illinois[201:230])
##                       ME       RMSE        MAE       MPE      MAPE       MASE
## Training set    35.30461   493.3245   324.3018 0.6896976 0.9663941  0.2166832
## Test set     17651.08794 24651.7395 17659.9893 4.6794212 4.6823136 11.7995738
##                    ACF1
## Training set 0.07694218
## Test set             NA
range(confirmed$Illinois[201:230])
## [1] 300385 416559
range(forecasted$mean)
## [1] 299919.9 357656.1
# SIR model of bad case scenario
iniv = c(S=1300,I=1,R=0)
parameters = c(gammaV=(1/14), beta=0.80)
time = seq(from=1,t=365,by=1)
SIR_model <- function(time, initial, parameter){
  with(as.list(c(initial,parameter)),{
  N = S+I+R
  dS = -(beta*S*I)/N
  dI = (beta*S*I)/N - (gammaV*I)
  dR = (gammaV*I) 
  return(list(c(dS,dI,dR)))
  }
  )
}

output = ode(y=iniv,func=SIR_model,parms=parameters,times=time)

output = as.data.frame(output)

ggplot(data = output, aes(x=time)) + geom_line(aes(y=S),color="blue") + geom_line(aes(y=I),color="Red") + geom_line(aes(y=R),color ='green') + geom_hline(yintercept = 400) + labs(x='Days passed',y='Population')

# SIR model of good case scenario
iniv = c(S=1300,I=1,R=0)
parameters = c(gammaV=(1/14), beta=0.10)
time = seq(from=1,t=365,by=1)
SIR_model <- function(time, initial, parameter){
  with(as.list(c(initial,parameter)),{
  N = S+I+R
  dS = -(beta*S*I)/N
  dI = (beta*S*I)/N - (gammaV*I)
  dR = (gammaV*I) 
  return(list(c(dS,dI,dR)))
  }
  )
}

output = ode(y=iniv,func=SIR_model,parms=parameters,times=time)

output = as.data.frame(output)

ggplot(data = output, aes(x=time)) + geom_line(aes(y=S),color="blue") + geom_line(aes(y=I),color="Red") + geom_line(aes(y=R),color ='green') + geom_hline(yintercept = 400) + labs(x='Days passed',y='Population')